# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
An example shows how to generate chat completions from reasoning models
like DeepSeekR1.

To run this example, you need to start the vLLM server with the reasoning
parser:

```bash
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
     --reasoning-parser deepseek_r1
```

Unlike openai_chat_completion_with_reasoning.py, this example demonstrates the
streaming chat completions feature.

The streaming chat completions feature allows you to receive chat completions
in real-time as they are generated by the model. This is useful for scenarios
where you want to display chat completions to the user as they are generated
by the model.

Remember to check content and reasoning_content exist in `ChatCompletionChunk`,
content may not exist leading to errors if you try to access it.
"""

from openai import OpenAI

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"

messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]


def main():
    client = OpenAI(
        api_key=openai_api_key,
        base_url=openai_api_base,
    )

    models = client.models.list()
    model = models.data[0].id

    # ruff: noqa: E501
    # For granite: add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
    stream = client.chat.completions.create(model=model, messages=messages, stream=True)

    print("client: Start streaming chat completions...")
    printed_reasoning_content = False
    printed_content = False

    for chunk in stream:
        # Safely extract reasoning_content and content from delta,
        # defaulting to None if attributes don't exist or are empty strings
        reasoning_content = (
            getattr(chunk.choices[0].delta, "reasoning_content", None) or None
        )
        content = getattr(chunk.choices[0].delta, "content", None) or None

        if reasoning_content is not None:
            if not printed_reasoning_content:
                printed_reasoning_content = True
                print("reasoning_content:", end="", flush=True)
            print(reasoning_content, end="", flush=True)
        elif content is not None:
            if not printed_content:
                printed_content = True
                print("\ncontent:", end="", flush=True)
            # Extract and print the content
            print(content, end="", flush=True)


if __name__ == "__main__":
    main()
